install.packages('corpcor')
install.packages('mctest')
require(mgcv)
require(corpcor)
require(mctest)


#Importing datasets

all_sellerdata  = read.csv(file="C:\\Users\\f00456n\\Documents\\Amazon Price Dynamics\\Data Scraped\\New Data\\Modeling data sets\\Electric Cooker\\finalnumsellers_dataJuly2025_ecooker.csv", header=TRUE, sep=",")

modeldata = all_sellerdata[4:454,]

modeldata[is.na(modeldata)] = 0
attach(modeldata)

#Breville: number of sellers

data_numsellers_brev = data.frame(cbind(incr10per_amzn_brev,
                                  incr10per_clust1_brev,
                                  incr10per_clust4_brev,
                                  incr20per_amzn_brev,
                                  incr20per_clust1_brev,
                                  incr20per_clust4_brev,
                                  incr5per_amzn_brev,
                                  incr5per_clust1_brev,
                                  incr5per_clust4_brev,
                                  decr10per_amzn_brev,
                                  decr10per_clust1_brev,
                                  decr10per_clust4_brev,
                                  decr20per_amzn_brev,
                                  decr20per_clust1_brev,
                                  decr20per_clust4_brev,
                                  decr5per_amzn_brev,
                                  decr5per_clust1_brev,
                                  decr5per_clust4_brev,
                                  buyboxprice_brv_lag1,
                                  num_brvclust2_lag1,
                                  brv_bottombrandlag1,
                                  brv_topbrandlag1,
                                  weekend,
                                  #more additional variables - in absence of review text
                                  priceAmzn_Brev_lag,
                                  nonbbox3pprice_brev_lag,
                                  Answered_Questionsbrv_lag1,
                                  Product_reviews_brv_lag1,
                                  product_star_brv_lag1,
                                  salesrank_cat_brv_lag1,
                                  salesrank_subcat_brv_lag1,
                                  num_brvused3p_lag1,
                                  seasonal_sale))

#VARIANCE CHECK
variance_X = rep(0,ncol(data_numsellers_brev))

for (m in (1:ncol(data_numsellers_brev)))
{ variance_X[m] = var(data_numsellers_brev[,m]) }

nonzero_var = ncol(data_numsellers_brev)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_numsellers_brev),nonzero_var)

trainX_retained = data_numsellers_brev[,(variance_X != 0)]


data_numsellers_brev = trainX_retained


gam_numsellers_brev = gam(NUM_SELLERS_Breville ~ 
                        decr10per_amzn_brev+
                      #  decr10per_clust4_brev+
                       # decr20per_amzn_brev+
                        ##decr20per_clust4_brev+
                        decr5per_amzn_brev+
                        ##decr5per_clust4_brev+
                       ## incr10per_amzn_brev+
                        incr10per_clust4_brev+
                        #incr20per_amzn_brev+
                        ##incr20per_clust4_brev+
                        incr5per_amzn_brev+
                        incr5per_clust4_brev+
                       buyboxprice_brv_lag1+
                        priceAmzn_Brev_lag +
                        nonbbox3pprice_brev_lag+ 
                       brv_bottombrandlag1+
                         weekend +
                        Answered_Questionsbrv_lag1+
                       Product_reviews_brv_lag1+
                        product_star_brv_lag1+
                        salesrank_cat_brv_lag1+
                        salesrank_subcat_brv_lag1+
                        num_brvused3p_lag1
                     + seasonal_sale
                     ,family= gaussian(link ='identity') ,data=data_numsellers_brev,method="REML",optimizer=c("outer","newton"), fit = TRUE)

summary(gam_numsellers_brev)



#Cosori: number of sellers

data_numsellers_cosori = data.frame(cbind(incr10per_amzn_csri,
                                          incr20per_amzn_csri,
                                          incr5per_amzn_csri,
                                          decr10per_amzn_csri,
                                          decr20per_amzn_csri,
                                          decr5per_amzn_csri,
                                          buyboxprice_cosori_lag1,
                                          csri_bottombrandlag1,
                                        csri_topbrandlag1,
                                         weekend,
                                        #more additional variables - in absence of review text
                                        priceAmzn_csri_lag,
                                        nonbbox3pprice_csri_lag,
                                        Product_reviews_cosori_lag1,
                                        product_star_cosori_lag1,
                                        salesrank_cat_cosori_lag1,
                                        salesrank_subcat_cosori_lag1,
                                        num_Cosoriused3p_lag1,
                                        seasonal_sale))


#VARIANCE CHECK
variance_X = rep(0,ncol(data_numsellers_cosori))

for (m in (1:ncol(data_numsellers_cosori)))
{ variance_X[m] = var(data_numsellers_cosori[,m]) }

nonzero_var = ncol(data_numsellers_cosori)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_numsellers_cosori),nonzero_var)

trainX_retained = data_numsellers_cosori[,(variance_X != 0)]


data_numsellers_cosori = trainX_retained

gam_numsellers_cosori = gam(NUM_SELLERS_Cosori ~ buyboxprice_cosori_lag1+
                              csri_bottombrandlag1+
                              #incr10per_amzn_csri+
                              incr20per_amzn_csri +
                              incr5per_amzn_csri+
                              #decr10per_amzn_csri+
                              decr20per_amzn_csri +
                             # decr5per_amzn_csri+
                              Product_reviews_cosori_lag1 + 
                              product_star_cosori_lag1 + 
                              salesrank_cat_cosori_lag1 + 
                              priceAmzn_csri_lag+
                            nonbbox3pprice_csri_lag+
                              #salesrank_subcat_cosori_lag1+ 
                              + weekend
                            + seasonal_sale
                          ,family= gaussian(link ='identity') ,data=data_numsellers_cosori,method="REML",optimizer=c("outer","newton"), fit = TRUE)

summary(gam_numsellers_cosori)


#Crock Pot: number of sellers

data_numsellers_crockp = data.frame(cbind(decr10per_clust3_crockp,
                                          decr10per_clust5_crockp,
                                          decr20per_clust3_crockp,
                                          decr20per_clust5_crockp,
                                          decr5per_clust3_crockp,
                                          decr5per_clust5_crockp,
                                          incr10per_clust3_crockp,
                                          incr10per_clust5_crockp,
                                          incr20per_clust3_crockp,
                                          incr20per_clust5_crockp,
                                          incr5per_clust3_crockp,
                                          incr5per_clust5_crockp,
                                          decr5per_amzn_crockp,
                                          decr10per_amzn_crockp,
                                          decr20per_amzn_crockp,
                                          incr20per_amzn_crockp,
                                          incr10per_amzn_crockp,
                                          incr5per_amzn_crockp,
                                          buyboxprice_crockp_lag1,
                                          crockp_bottombrandlag1,
                                          crockp_topbrandlag1,
                                          weekend,
                                          #more additional variables - in absence of review text
                                          priceclust3_CrockP_lag, 
                                          priceclust5_CrockP_lag,
                                          nonbbox3pprice_crockp_lag,
                                          priceAmzn_CrockP_lag,
                                          Product_reviews_crockp_lag1,
                                          product_star_crockp_lag1,
                                          salesrank_cat_crockp_lag1,
                                          salesrank_subcat_crockp_lag1,
                                          num_CrockPused3p_lag1,
                                          seasonal_sale))

#VARIANCE CHECK
variance_X = rep(0,ncol(data_numsellers_crockp))

for (m in (1:ncol(data_numsellers_crockp)))
{ variance_X[m] = var(data_numsellers_crockp[,m]) }

nonzero_var = ncol(data_numsellers_crockp)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_numsellers_crockp),nonzero_var)

trainX_retained = data_numsellers_crockp[,(variance_X != 0)]


data_numsellers_crockp = trainX_retained

gam_numsellers_crockp = gam(NUM_SELLERS_CrockP ~ 
                             ## decr10per_clust3_crockp +
                             ## decr10per_clust5_crockp+
                           ##   decr20per_clust3_crockp+
                             # decr20per_clust5_crockp+
                            #  decr5per_clust3_crockp+
                             ## decr5per_clust5_crockp+
                             ## incr10per_clust3_crockp +
                             ## incr10per_clust5_crockp+
                              incr20per_clust3_crockp+
                             ## incr20per_clust5_crockp+
                             ## incr5per_clust3_crockp+
                            #  incr5per_clust5_crockp+
                            # decr5per_amzn_crockp + 
                        #   decr10per_amzn_crockp +
                           decr20per_amzn_crockp +
                           incr20per_amzn_crockp +
                          # incr10per_amzn_crockp +
                           #incr5per_amzn_crockp +
                              buyboxprice_crockp_lag1+
                             priceAmzn_CrockP_lag +
                           #  priceclust3_CrockP_lag+ 
                             priceclust5_CrockP_lag+
                             nonbbox3pprice_crockp_lag+
                              num_CrockPused3p_lag1 +
                              Product_reviews_crockp_lag1 + 
                              product_star_crockp_lag1 + 
                              salesrank_cat_crockp_lag1 + 
                            #  salesrank_subcat_crockp_lag1 + 
                             weekend +
                            + seasonal_sale 
                            ,family= gaussian(link ='identity') ,data=data_numsellers_crockp,method="REML",optimizer=c("outer","newton"), fit = TRUE)

summary(gam_numsellers_crockp)


#Elite Platinum: number of sellers

data_numsellers_EP = data.frame(cbind(decr10per_3p_ep,
                                          decr10per_amzn_ep,
                                          decr20per_3p_ep,
                                          decr20per_amzn_ep,
                                          decr5per_3p_ep,
                                          decr5per_amzn_ep,
                                          incr10per_3p_ep,
                                          incr10per_amzn_ep,
                                          incr20per_3p_ep,
                                          incr20per_amzn_ep,
                                          incr5per_3p_ep,
                                          incr5per_amzn_ep,
                                          buyboxprice_EP_lag1,
                                          ep_bottombrandlag1,
                                          ep_topbrandlag1,
                                          weekend,
                                          #more additional variables - in absence of review text
                                          priceAmzn_EP_lag,
                                          nonbbox3pprice_EP_lag,
                                          Product_reviews_EP_lag1,
                                          product_star_EP_lag1,
                                          salesrank_cat_EP_lag1,
                                          #salesrank_subcat_EP_lag1,
                                          num_EPused3p_lag1,
                                          num_EPusedamzn_lag1,
                                          seasonal_sale))



#VARIANCE CHECK
variance_X = rep(0,ncol(data_numsellers_EP))

for (m in (1:ncol(data_numsellers_EP)))
{ variance_X[m] = var(data_numsellers_EP[,m]) }

nonzero_var = ncol(data_numsellers_EP)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_numsellers_EP),nonzero_var)

trainX_retained = data_numsellers_EP[,(variance_X != 0)]


data_numsellers_EP = trainX_retained

gam_numsellers_EP = gam(NUM_SELLERS_EP ~ decr10per_amzn_ep +
                              decr20per_amzn_ep +
                              decr5per_3p_ep + 
                              decr5per_amzn_ep +
                              #incr10per_3p_ep + 
                              incr10per_amzn_ep + 
                              incr20per_amzn_ep + 
                              #incr5per_3p_ep + 
                              incr5per_amzn_ep +
                              buyboxprice_EP_lag1+
                              nonbbox3pprice_EP_lag+
                              priceAmzn_EP_lag +
                              ep_bottombrandlag1+
                              num_EPused3p_lag1 +
                              Product_reviews_EP_lag1 + 
                              product_star_EP_lag1 + 
                              salesrank_cat_EP_lag1 + 
                              #salesrank_subcat_EP_lag1 + 
                              weekend +
                            + seasonal_sale 
                            ,family= gaussian(link ='identity') ,data=data_numsellers_EP,method="REML",optimizer=c("outer","newton"), fit = TRUE)

summary(gam_numsellers_EP)



#Instant Pot: number of sellers

data_numsellers_IP = data.frame(cbind(decr10per_amzn_ip,
                                      decr20per_amzn_ip,
                                      decr5per_amzn_ip,
                                      incr10per_amzn_ip,
                                      incr20per_amzn_ip,
                                      incr5per_amzn_ip,
                                      mean_buyboxprice_IP_lag1,
                                      ip_bottombrandlag1,
                                      ip_topbrandlag1,
                                      weekend,
                                      #more additional variables - in absence of review text
                                      priceAmzn_IP_lag,
                                      mean_nonbuybox3p_price_IP,
                                      mean_ans_questions_IP_lag1,
                                      mean_Product_reviews_IP_lag1,
                                      mean_product_star_rate_IP_lag1,
                                      mean_salesrank_categ_IP_lag1,
                                      #mean_salesrank_subcat_IP_lag1,
                                      NUM_IPused3p_lag1,
                                      NUM_IPamzn_lag1,
                                      seasonal_sale))
#VARIANCE CHECK
variance_X = rep(0,ncol(data_numsellers_IP))

for (m in (1:ncol(data_numsellers_IP)))
{ variance_X[m] = var(data_numsellers_IP[,m]) }

nonzero_var = ncol(data_numsellers_IP)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#Retaining variables with non-zero variance

trainX_retained = matrix(0,nrow(data_numsellers_IP),nonzero_var)

trainX_retained = data_numsellers_IP[,(variance_X != 0)]


data_numsellers_IP = trainX_retained

gam_numsellers_IP = gam(NUM_SELLERS_IP ~ 
                         # decr10per_amzn_ip+
                        #  decr20per_amzn_ip+
                         #decr5per_amzn_ip+
                        #  incr10per_amzn_ip+
                        #  incr20per_amzn_ip+
                          incr5per_amzn_ip+
                          mean_ans_questions_IP_lag1 +
                           mean_buyboxprice_IP_lag1+
                          priceAmzn_IP_lag +
                          mean_nonbuybox3p_price_IP+
                          mean_salesrank_categ_IP_lag1+
                          mean_product_star_rate_IP_lag1+
                          #ip_topbrandlag1 + 
                          ##NUM_IPused3p_lag1 +
                          weekend
                        + seasonal_sale 
                        ,family= gaussian(link ='identity') ,data=data_numsellers_IP,method="REML",optimizer=c("outer","newton"), fit = TRUE)

summary(gam_numsellers_IP)
